Критерий | Одновыборочный | Двухвыборочный | Двухвыборочный (связанные выборки) |
---|---|---|---|
Знаков | $\times$ | $\times$ | |
Ранговый | $\times$ | $\times$ | $\times$ |
Перестановочный | $\times$ | $\times$ | $\times$ |
В исследовании оценивается эффективность поведенческой терапии для лечения анорексии. Для 50 пациентов известен вес до начала терапии и по её окончании. Была ли терапия эффективной?
In [1]:
import numpy as np
import pandas as pd
import itertools
from scipy import stats
from statsmodels.stats.descriptivestats import sign_test
from statsmodels.stats.weightstats import zconfint
In [2]:
%pylab inline
In [3]:
weight_data = pd.read_csv('weight.txt', sep = '\t', header = 0)
In [4]:
weight_data.head()
Out[4]:
In [5]:
pylab.figure(figsize=(12,4))
pylab.subplot(1,2,1)
pylab.grid()
pylab.hist(weight_data.Before, color = 'r')
pylab.xlabel('Before')
pylab.subplot(1,2,2)
pylab.grid()
pylab.hist(weight_data.After, color = 'b')
pylab.xlabel('After')
pylab.show()
In [6]:
weight_data.describe()
Out[6]:
$H_0\colon$ медианы веса до и после терапии совпадает
$H_1\colon$ медианы веса до и после тепрапии отличаются
In [7]:
print '95%% confidence interval for median weight before therapy: [%f, %f]' % zconfint(weight_data.Before)
In [8]:
print '95%% confidence interval for median weight after therapy: [%f, %f]' % zconfint(weight_data.After)
In [9]:
pylab.hist(weight_data.After - weight_data.Before)
pylab.show()
$H_0\colon$ медианы равны
$H_1\colon$ медианы не равны
In [10]:
print "M: %d, p-value: %f" % sign_test(weight_data.After - weight_data.Before)
In [11]:
stats.wilcoxon(weight_data.After, weight_data.Before)
Out[11]:
In [12]:
stats.wilcoxon(weight_data.After - weight_data.Before)
Out[12]:
$H_0\colon E(X_1 - X_2) = 0$
$H_1\colon E(X_1 - X_2) ≠ 0$
In [13]:
def permutation_t_stat_1sample(sample, mean):
t_stat = sum(map(lambda x: x - mean, sample))
return t_stat
In [14]:
def permutation_zero_distr_1sample(sample, mean, max_permutations = None):
centered_sample = map(lambda x: x - mean, sample)
if max_permutations:
signs_array = set([tuple(x) for x in 2 * np.random.randint(2, size = (max_permutations,
len(sample))) - 1 ])
else:
signs_array = itertools.product([-1, 1], repeat = len(sample))
distr = [sum(centered_sample * np.array(signs)) for signs in signs_array]
return distr
In [15]:
pylab.hist(permutation_zero_distr_1sample(weight_data.After - weight_data.Before, 0.,
max_permutations = 10000))
pylab.show()
In [16]:
def permutation_test(sample, mean, max_permutations = None, alternative = 'two-sided'):
if alternative not in ('two-sided', 'less', 'greater'):
raise ValueError("alternative not recognized\n"
"should be 'two-sided', 'less' or 'greater'")
t_stat = permutation_t_stat_1sample(sample, mean)
zero_distr = permutation_zero_distr_1sample(sample, mean, max_permutations)
if alternative == 'two-sided':
return sum([1. if abs(x) >= abs(t_stat) else 0. for x in zero_distr]) / len(zero_distr)
if alternative == 'less':
return sum([1. if x <= t_stat else 0. for x in zero_distr]) / len(zero_distr)
if alternative == 'greater':
return sum([1. if x >= t_stat else 0. for x in zero_distr]) / len(zero_distr)
In [17]:
print "p-value: %f" % permutation_test(weight_data.After - weight_data.Before, 0.,
max_permutations = 1000)
In [18]:
print "p-value: %f" % permutation_test(weight_data.After - weight_data.Before, 0.,
max_permutations = 50000)